{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.10.0\n",
      "Training entries: 25000, labels: 25000\n",
      "[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]\n"
     ]
    },
    {
     "data": {
      "text/plain": "(218, 189)"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "import numpy as np\n",
    "print(tf.__version__)\n",
    "imdb = keras.datasets.imdb\n",
    "(train_data, train_labels), (test_data, test_labels) = imdb.load_data('train。csv',num_words=15000)\n",
    "print(\"Training entries: {}, labels: {}\".format(len(train_data), len(train_labels)))\n",
    "print(train_data[0])\n",
    "len(train_data[0]), len(train_data[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json\n",
      "1641221/1641221 [==============================] - 1s 0us/step\n"
     ]
    },
    {
     "data": {
      "text/plain": "\"<START> shown in australia as <UNK> this incredibly bad movie is so bad that you become <UNK> and have to watch it to the end just to see if it could get any worse and it does the storyline is so predictable it seems written by a high school <UNK> class the sets are pathetic but marginally better than the <UNK> and the acting is wooden br br the infant <UNK> seems to have been stolen from the props <UNK> of <UNK> <UNK> there didn't seem to be a single original idea in the whole movie br br i found this movie to be so bad that i laughed most of the way through br br malcolm mcdowell should hang his head in shame he obviously needed the money\""
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# A dictionary mapping words to an integer index\n",
    "word_index = imdb.get_word_index()\n",
    "\n",
    "# The first indices are reserved\n",
    "word_index = {k:(v+3) for k,v in word_index.items()}\n",
    "word_index[\"<PAD>\"] = 0\n",
    "word_index[\"<START>\"] = 1\n",
    "word_index[\"<UNK>\"] = 2  # unknown\n",
    "word_index[\"<UNUSED>\"] = 3\n",
    "\n",
    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
    "\n",
    "def decode_review(text):\n",
    "    return ' '.join([reverse_word_index.get(i, '?') for i in text])\n",
    "decode_review(train_data[20])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[    1    14    22    16    43   530   973  1622  1385    65   458  4468\n",
      "    66  3941     4   173    36   256     5    25   100    43   838   112\n",
      "    50   670     2     9    35   480   284     5   150     4   172   112\n",
      "   167     2   336   385    39     4   172  4536  1111    17   546    38\n",
      "    13   447     4   192    50    16     6   147  2025    19    14    22\n",
      "     4  1920  4613   469     4    22    71    87    12    16    43   530\n",
      "    38    76    15    13  1247     4    22    17   515    17    12    16\n",
      "   626    18     2     5    62   386    12     8   316     8   106     5\n",
      "     4  2223  5244    16   480    66  3785    33     4   130    12    16\n",
      "    38   619     5    25   124    51    36   135    48    25  1415    33\n",
      "     6    22    12   215    28    77    52     5    14   407    16    82\n",
      " 10311     8     4   107   117  5952    15   256     4     2     7  3766\n",
      "     5   723    36    71    43   530   476    26   400   317    46     7\n",
      "     4 12118  1029    13   104    88     4   381    15   297    98    32\n",
      "  2071    56    26   141     6   194  7486    18     4   226    22    21\n",
      "   134   476    26   480     5   144    30  5535    18    51    36    28\n",
      "   224    92    25   104     4   226    65    16    38  1334    88    12\n",
      "    16   283     5    16  4472   113   103    32    15    16  5345    19\n",
      "   178    32     0     0     0     0     0     0     0     0     0     0\n",
      "     0     0     0     0     0     0     0     0     0     0     0     0\n",
      "     0     0     0     0     0     0     0     0     0     0     0     0\n",
      "     0     0     0     0]\n"
     ]
    }
   ],
   "source": [
    "train_data = keras.preprocessing.sequence.pad_sequences(train_data,\n",
    "                                                        value=word_index[\"<PAD>\"],\n",
    "                                                        padding='post',\n",
    "                                                        maxlen=256)\n",
    "\n",
    "test_data = keras.preprocessing.sequence.pad_sequences(test_data,\n",
    "                                                       value=word_index[\"<PAD>\"],\n",
    "                                                       padding='post',\n",
    "                                                       maxlen=256)\n",
    "len(train_data[0]), len(train_data[1])\n",
    "print(train_data[0])\n",
    "vocab_size = 15000"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " embedding (Embedding)       (None, None, 16)          240000    \n",
      "                                                                 \n",
      " global_average_pooling1d (G  (None, 16)               0         \n",
      " lobalAveragePooling1D)                                          \n",
      "                                                                 \n",
      " dense (Dense)               (None, 16)                272       \n",
      "                                                                 \n",
      " dropout (Dropout)           (None, 16)                0         \n",
      "                                                                 \n",
      " dense_1 (Dense)             (None, 1)                 17        \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 240,289\n",
      "Trainable params: 240,289\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model = keras.Sequential()\n",
    "model.add(keras.layers.Embedding(vocab_size, 16))\n",
    "model.add(keras.layers.GlobalAveragePooling1D())\n",
    "#model.add(keras.layers.GlobalMaxPooling1D())\n",
    "model.add(keras.layers.Dense(16, activation=tf.nn.relu))\n",
    "model.add(keras.layers.Dropout(0.5))\n",
    "model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))\n",
    "model.summary()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}